rm(list = ls())

if (!require("pacman")) install.packages("pacman")
pacman::p_load(labelled, magrittr, tidyverse, readxl)

# Load and prepare data
polbar <- readRDS("data/pb_2016_clean.rds") %>%
    dplyr::rename(male_bin = male, east_bin = east) %>%
    mutate(
        men = ifelse(male_bin == 1, "Men", "Women"),
        east = ifelse(east_bin == 1, "East Germany", "West Germany")
    ) %>%
    filter(!yob == -4)

# Extract and process age category labels
labs <- polbar$age_categ %>%
    val_labels() %>%
    data.frame(val = ., lab = names(.), row.names = NULL, stringsAsFactors = F) %>%
    filter(!is.na(val)) %>%
    mutate(yob = -(val - 6))

# Convert age ranges to birth cohorts
to_cohort <- function(string) {
    str_extract_all(string, "(^|[^0-9])[1-9][0-9]($|[^0-9])") %>%
        unlist() %>%
        str_squish() %>%
        as.numeric() %>%
        magrittr::subtract(e2 = 2016) %>%
        magrittr::multiply_by(-1) %>%
        as.character() %>%
        sort(decreasing = F) %>%
        paste0(collapse = "-")
}

labs$cohort_lab <- sapply(labs$lab, to_cohort)

# Merge cohort labels with main data
polbar <- polbar %>%
    left_join(labs %>% dplyr::select(yob, cohort_lab)) %>%
    mutate(men_east = ifelse(men == "Men", "East German men", "East German women"))

# Labels
m <- structure(list(
    variable = "sat_mw", use_as_outcome = 1, use_as_outcome2 = 1,
    label = "Satisfaction with market economy", `original item` = "Was meinen Sie zur sozialen Marktwirtschaft in Deutschland? Sind Sie damit …eher zufrieden?"
), row.names = c(
    NA,
    -1L
), class = "data.frame")

# Plot function
outcome_list <- "sat_mw"

pf <- function(outcome_list) {
    # Prepare data for plotting
    polbar_temp <- polbar %>%
        filter(east_bin == 1) %>%
        dplyr::select(men_east, yob, one_of(outcome_list)) %>%
        pivot_longer(
            cols = one_of(outcome_list),
            values_to = "o", names_to = "variable"
        ) %>%
        filter(!is.na(o)) %>%
        left_join(m %>% dplyr::select(variable, label))

    # Calculate conditional means
    cmeans <- polbar_temp %>%
        group_by(men_east, yob, variable) %>%
        summarise(m = mean(o, na.rm = T), n = n(), .groups = "drop") %>%
        left_join(labs %>% dplyr::select(yob, cohort_lab)) %>%
        left_join(m %>% dplyr::select(variable, label))

    # Create plot
    ggplot(polbar_temp, aes(x = yob, y = o, color = men_east)) +
        geom_point(data = cmeans, aes(x = yob, y = m)) +
        geom_vline(xintercept = c(-0.5), linetype = "dotted") +
        geom_smooth(
            data = polbar_temp,
            aes(x = yob, group = yob > -1, y = o),
            method = "lm"
        ) +
        theme_bw(base_size = 16) +
        ylab("Satisfaction with market economy") +
        xlab("Cohort group") +
        facet_wrap(~men_east) +
        scale_color_manual(values = c("black", "grey")) +
        scale_x_continuous(
            breaks = unique(cmeans$yob),
            labels = unique(cmeans$cohort_lab)
        ) +
        theme(legend.position = "none")
}

p1 <- pf(outcome_list)
p1
